{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from blazingsql import BlazingContext\n",
    "from dask.distributed import Client\n",
    "from dask_cuda import LocalCUDACluster\n",
    "from dask_cuda import initialize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "enable_tcp_over_ucx = True\n",
    "enable_nvlink = False\n",
    "enable_infiniband = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"UCX_LOG_LEVEL\"] = \"TRACE\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:bokeh.server.util:Host wildcard '*' will allow connections originating from multiple (or possibly all) hostnames or IPs. Use non-wildcard values to restrict access explicitly\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table style=\"border: 2px solid white;\">\n",
       "<tr>\n",
       "<td style=\"vertical-align: top; border: 0px solid white\">\n",
       "<h3 style=\"text-align: left;\">Client</h3>\n",
       "<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
       "  <li><b>Scheduler: </b>ucx://127.0.0.1:39001</li>\n",
       "  <li><b>Dashboard: </b><a href='http://127.0.0.1:8787/status' target='_blank'>http://127.0.0.1:8787/status</a></li>\n",
       "</ul>\n",
       "</td>\n",
       "<td style=\"vertical-align: top; border: 0px solid white\">\n",
       "<h3 style=\"text-align: left;\">Cluster</h3>\n",
       "<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
       "  <li><b>Workers: </b>2</li>\n",
       "  <li><b>Cores: </b>2</li>\n",
       "  <li><b>Memory: </b>50.39 GB</li>\n",
       "</ul>\n",
       "</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<Client: 'ucx://127.0.0.1:39001' processes=2 threads=2, memory=50.39 GB>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "initialize.initialize(create_cuda_context=True,\n",
    "                      enable_tcp_over_ucx=enable_tcp_over_ucx,\n",
    "                      enable_nvlink=enable_nvlink,\n",
    "                      enable_infiniband=enable_infiniband)\n",
    "cluster = LocalCUDACluster(protocol=\"ucx\",\n",
    "                           enable_tcp_over_ucx=enable_tcp_over_ucx,\n",
    "                           enable_nvlink=enable_nvlink,\n",
    "                           enable_infiniband=enable_infiniband)\n",
    "\n",
    "client = Client(cluster)\n",
    "client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cscotun0: flags=4305<UP,POINTOPOINT,RUNNING,NOARP,MULTICAST>  mtu 1406\n",
      "        inet 10.2.167.188  netmask 255.255.240.0  destination 10.2.167.188\n",
      "        inet6 fe80::49e9:cde6:a28d:ee99  prefixlen 64  scopeid 0x20<link>\n",
      "        unspec 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00  txqueuelen 500  (UNSPEC)\n",
      "        RX packets 788042  bytes 739765483 (705.4 MiB)\n",
      "        RX errors 0  dropped 0  overruns 0  frame 0\n",
      "        TX packets 609922  bytes 128185359 (122.2 MiB)\n",
      "        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0\n",
      "\n",
      "enp1s0f0u9c4i2: flags=4099<UP,BROADCAST,MULTICAST>  mtu 1500\n",
      "        ether 9a:00:c6:5e:cc:d2  txqueuelen 1000  (Ethernet)\n",
      "        RX packets 0  bytes 0 (0.0 B)\n",
      "        RX errors 0  dropped 0  overruns 0  frame 0\n",
      "        TX packets 0  bytes 0 (0.0 B)\n",
      "        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0\n",
      "\n",
      "enp5s0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500\n",
      "        inet 192.168.50.250  netmask 255.255.255.0  broadcast 192.168.50.255\n",
      "        inet6 fe80::502c:ff78:1930:67a3  prefixlen 64  scopeid 0x20<link>\n",
      "        ether 10:7b:44:92:72:4e  txqueuelen 1000  (Ethernet)\n",
      "        RX packets 1223856  bytes 1058019089 (1009.0 MiB)\n",
      "        RX errors 0  dropped 0  overruns 0  frame 0\n",
      "        TX packets 893310  bytes 237277827 (226.2 MiB)\n",
      "        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0\n",
      "        device memory 0xfa500000-fa51ffff  \n",
      "\n",
      "lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536\n",
      "        inet 127.0.0.1  netmask 255.0.0.0\n",
      "        inet6 ::1  prefixlen 128  scopeid 0x10<host>\n",
      "        loop  txqueuelen 1000  (Local Loopback)\n",
      "        RX packets 1352445  bytes 16863625961 (15.7 GiB)\n",
      "        RX errors 0  dropped 0  overruns 0  frame 0\n",
      "        TX packets 1352445  bytes 16863625961 (15.7 GiB)\n",
      "        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0\n",
      "\n",
      "tun0: flags=4305<UP,POINTOPOINT,RUNNING,NOARP,MULTICAST>  mtu 1500\n",
      "        inet 10.8.1.1  netmask 255.255.255.255  destination 10.8.1.2\n",
      "        inet6 fe80::e978:31d2:b4ed:cdf8  prefixlen 64  scopeid 0x20<link>\n",
      "        unspec 00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00  txqueuelen 100  (UNSPEC)\n",
      "        RX packets 0  bytes 0 (0.0 B)\n",
      "        RX errors 0  dropped 0  overruns 0  frame 0\n",
      "        TX packets 3521  bytes 694422 (678.1 KiB)\n",
      "        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0\n",
      "\n",
      "virbr0: flags=4099<UP,BROADCAST,MULTICAST>  mtu 1500\n",
      "        inet 192.168.122.1  netmask 255.255.255.0  broadcast 192.168.122.255\n",
      "        ether 52:54:00:ac:a7:a5  txqueuelen 1000  (Ethernet)\n",
      "        RX packets 0  bytes 0 (0.0 B)\n",
      "        RX errors 0  dropped 0  overruns 0  frame 0\n",
      "        TX packets 0  bytes 0 (0.0 B)\n",
      "        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!ifconfig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "starting polling thread\n",
      "starting listeners\n",
      "started listeners\n",
      "blazing_log\n",
      "BlazingContext ready\n"
     ]
    }
   ],
   "source": [
    "bc = BlazingContext(network_interface=\"enp5s0\", dask_client=client, config_options={\"MAX_JOIN_SCATTER_MEM_OVERHEAD\": 1})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cudf\n",
    "import numpy as np\n",
    "df = cudf.DataFrame({chr(x): cudf.Series(np.arange(4172, dtype=\"float64\")) for x in range(65, 66)})\n",
    "\n",
    "import dask_cudf\n",
    "\n",
    "ddf = dask_cudf.from_cudf(df, npartitions=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wed Jul 15 19:12:29 2020       \n",
      "+-----------------------------------------------------------------------------+\n",
      "| NVIDIA-SMI 440.33.01    Driver Version: 440.33.01    CUDA Version: 10.2     |\n",
      "|-------------------------------+----------------------+----------------------+\n",
      "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
      "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
      "|===============================+======================+======================|\n",
      "|   0  Quadro GV100        Off  | 00000000:42:00.0 Off |                  Off |\n",
      "| 37%   50C    P2    45W / 250W |   1324MiB / 32508MiB |      2%      Default |\n",
      "+-------------------------------+----------------------+----------------------+\n",
      "|   1  Quadro GV100        Off  | 00000000:43:00.0  On |                  Off |\n",
      "| 45%   59C    P2    52W / 250W |   1493MiB / 32505MiB |      1%      Default |\n",
      "+-------------------------------+----------------------+----------------------+\n",
      "                                                                               \n",
      "+-----------------------------------------------------------------------------+\n",
      "| Processes:                                                       GPU Memory |\n",
      "|  GPU       PID   Type   Process name                             Usage      |\n",
      "|=============================================================================|\n",
      "|    0     19933      C   ...oftware/miniconda3/envs/bsql/bin/python   657MiB |\n",
      "|    0     20041      C   ...oftware/miniconda3/envs/bsql/bin/python   655MiB |\n",
      "|    1      4132      G   /usr/bin/X                                   271MiB |\n",
      "|    1      4397      G   /usr/bin/gnome-shell                         179MiB |\n",
      "|    1      5033      G   ...quest-channel-token=1920258119881352545    65MiB |\n",
      "|    1      8628      G   ...AAAAAAAAAAAAAAgAAAAAAAAA --shared-files   306MiB |\n",
      "|    1     20037      C   ...oftware/miniconda3/envs/bsql/bin/python   655MiB |\n",
      "+-----------------------------------------------------------------------------+\n"
     ]
    }
   ],
   "source": [
    "!nvidia-smi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "ddf = ddf.persist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "bc.create_table(\"test\", ddf)\n",
    "bc.create_table(\"test2\", ddf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "running on ucx://127.0.0.1:43864\n",
      "running on ucx://127.0.0.1:60372\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2081</th>\n",
       "      <td>4167.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2082</th>\n",
       "      <td>4168.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2083</th>\n",
       "      <td>4169.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2084</th>\n",
       "      <td>4170.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2085</th>\n",
       "      <td>4171.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4172 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           A\n",
       "0        0.0\n",
       "1        1.0\n",
       "2        2.0\n",
       "3        3.0\n",
       "4        4.0\n",
       "...      ...\n",
       "2081  4167.0\n",
       "2082  4168.0\n",
       "2083  4169.0\n",
       "2084  4170.0\n",
       "2085  4171.0\n",
       "\n",
       "[4172 rows x 1 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bc.sql(\"select * from test\").compute()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "running on ucx://127.0.0.1:43864\n",
      "running on ucx://127.0.0.1:60372\n"
     ]
    }
   ],
   "source": [
    "bc.sql(\"select test.A, test2.A from test full outer join test2 on test2.A = test.A \").compute()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Error found\n",
      "fail: \n",
      " org.apache.calcite.runtime.CalciteContextException: At line 1, column 34: Column 'b' not found in any table\n",
      "Parsing Error\n"
     ]
    },
    {
     "ename": "AttributeError",
     "evalue": "'NoneType' object has no attribute 'compute'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-29-507ebc044753>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mbc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msql\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"select avg(a) from test group by b\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'compute'"
     ]
    }
   ],
   "source": [
    "bc.sql(\"select avg(a) from test group by b\").compute()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_dir = \"/home/cjnolet/Downloads/data/tpch/\"\n",
    "bc.create_table(\"orders\",data_dir + \"orders*.parquet\")\n",
    "bc.create_table(\"nation\",data_dir + \"nation*.parquet\")\n",
    "bc.create_table(\"supplier\",data_dir + \"supplier*.parquet\")\n",
    "bc.create_table(\"region\",data_dir + \"region*.parquet\")\n",
    "bc.create_table(\"partsupp\",data_dir + \"partsupp*.parquet\")\n",
    "bc.create_table(\"part\",data_dir + \"part_*.parquet\")\n",
    "bc.create_table(\"lineitem\",data_dir + \"lineitem*.parquet\")\n",
    "bc.create_table(\"customer\",data_dir + \"customer*.parquet\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "running on ucx://127.0.0.1:48212\n",
      "running on ucx://127.0.0.1:50104\n"
     ]
    }
   ],
   "source": [
    "print(bc.sql(\"\"\"\n",
    "select COALESCE(orders.o_orderkey, 100),\n",
    "                    COALESCE(orders.o_totalprice, 0.01) from customer\n",
    "                    full outer join orders\n",
    "                    on customer.c_custkey = orders.o_custkey\n",
    "                    where customer.c_nationkey = 3\n",
    "                    and customer.c_custkey < 3\n",
    "                    LIMIT 2\n",
    "\"\"\").compute())\n",
    "print(bc.sql(\"\"\"\n",
    "select o_orderkey, count(o_custkey) from orders group by o_orderkey\n",
    "\"\"\").compute())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (bsql)",
   "language": "python",
   "name": "bsql"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
